Load the data
## will not work, because the csv is format is absolutely messed up
#climate_data <- read_csv2('https://www.wien.gv.at/statistik/ogd/jahrbuch/klimaundwetter/tab_1.3.2_klimaundwetter_.csv')
read_file('https://www.wien.gv.at/statistik/ogd/jahrbuch/klimaundwetter/tab_1.3.2_klimaundwetter_.csv') %>%
# remove title line
str_remove('^.*\r\n') %>%
# remove footer lines
str_remove('Quelle(.|\n|\r)*') %>%
# remove weird reference to 2nd footer line
str_replace('\\s+\\(1\\)(.|\n|\r)*%', ';sun_percent') %>%
# switch decimal character
str_replace_all(',', '.') %>%
# use comma as separator
str_replace_all(';', ',') %>%
write_file('climate_data.csv')
climate_data <- read_csv('climate_data.csv', na='-') %>%
# translate to english
rename(
Year = Jahr
, `SummerDays` = Sommertage
, `FrostDays` = Frosttage
, `IceDays` = Eistage
, `HeatDays` = Hitzetage
) %>%
mutate(
IceDays = -IceDays
, FrostDays = -FrostDays
)
## Rows: 74 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (7): Jahr, Frosttage, Eistage, Sommertage, Hitzetage, Sonnenscheindauer,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
climate_data
## # A tibble: 74 × 7
## Year FrostDays IceDays SummerDays HeatDays Sonnenscheindauer sun_percent
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1950 -62 -31 65 8 1959 45.8
## 2 1951 -51 -10 51 1 1898 44.4
## 3 1952 -93 -26 46 13 1847 43.2
## 4 1953 -79 -20 59 7 2140 50.0
## 5 1954 -79 -40 41 7 1784 41.7
## 6 1955 -91 -26 25 1 1710 40.0
## 7 1956 -102 -47 34 3 1937 45.3
## 8 1957 -56 -32 41 10 1822 42.6
## 9 1958 -77 -12 38 9 1767 41.3
## 10 1959 -63 -16 36 5 1964 45.9
## # ℹ 64 more rows
Visualize the data
Area Chart
fig <- plot_ly(
climate_data
, x = ~Year
, y = ~SummerDays
, name = 'Summer Days'
, type = 'scatter'
, mode = 'none'
, stackgroup = 'positive'
, fillcolor = 'orange'
)
fig <- fig %>% add_trace(y = ~HeatDays, name = 'Heat Days', fillcolor = 'red')
fig <- fig %>% add_trace(y = ~FrostDays, name = 'Frost Days', fillcolor = 'lightblue', stackgroup = "negative")
fig <- fig %>% add_trace(y = ~IceDays, name = 'Ice Days', fillcolor = 'blue', stackgroup = "negative")
fig <- fig %>% layout(
title = 'Climatological Index Days in Vienna, Austria, 1950-2023'
, xaxis = list(title = 'Year')
, yaxis = list(title = 'Number of Days')
, legend = list(x = 0.05, y = 0.95)
, hovermode = "x unified"
)
fig
## Warning: Ignoring 1 observations
api_create(fig, filename = 'climate_index_days_vienna', fileopt='overwrite')
## Warning: Ignoring 1 observations
## Found a grid already named: 'climate_index_days_vienna Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'climate_index_days_vienna'. Since fileopt='overwrite', I'll try to update it
Regression
fig <- climate_data %>%
select(Year:HeatDays) %>%
pivot_longer(cols = -Year, names_to = 'Index', values_to = 'Days') %>%
mutate(
Days = abs(Days)
, Index = recode(Index
, `SummerDays` = 'Summer Days'
, `FrostDays` = 'Frost Days'
, `IceDays` = 'Ice Days'
, `HeatDays` = 'Heat Days'
)
, Index = factor(Index, levels = c('Heat Days', 'Summer Days', 'Frost Days', 'Ice Days'))
) %>%
ggplot(aes(x = Year, y = Days, color = Index)) +
geom_point(alpha=0.5) +
theme_minimal() +
geom_smooth(method = 'lm', se = FALSE) +
scale_color_manual(values = c('red', 'orange', 'lightblue', 'blue')) +
labs(
title = 'Climatological Index Days in Vienna, Austria, 1950-2023'
, x = 'Year'
, y = 'Number of Days'
, color = 'Index'
)
fig <- fig %>% ggplotly()
## `geom_smooth()` using formula = 'y ~ x'
fig <- fig %>% layout(
legend = list(x = 0.45, y = 1)
)
fig
api_create(fig, filename = 'climate_index_days_vienna_regression', fileopt='overwrite')
## Found a grid already named: 'climate_index_days_vienna_regression Grid'. Since fileopt='overwrite', I'll try to update it
## Found a plot already named: 'climate_index_days_vienna_regression'. Since fileopt='overwrite', I'll try to update it
get regression coefficients
climate_data %>%
select(Year:HeatDays) %>%
pivot_longer(cols = -Year, names_to = 'Index', values_to = 'Days') %>%
mutate(
Days = abs(Days)
, Index = recode(Index
, `SummerDays` = 'Summer Days'
, `FrostDays` = 'Frost Days'
, `IceDays` = 'Ice Days'
, `HeatDays` = 'Heat Days'
)
, Index = factor(Index, levels = c('Heat Days', 'Summer Days', 'Frost Days', 'Ice Days'))
) %>%
group_by(Index) %>%
summarise(
Intercept = lm(Days ~ Year)$coefficients[1]
, Slope = lm(Days ~ Year)$coefficients[2]
, p_value = summary(lm(Days ~ Year))$coefficients[2, 4]
)
## # A tibble: 4 × 4
## Index Intercept Slope p_value
## <fct> <dbl> <dbl> <dbl>
## 1 Heat Days -619. 0.319 6.48e-11
## 2 Summer Days -1376. 0.721 1.01e-15
## 3 Frost Days 871. -0.404 1.44e- 5
## 4 Ice Days 508. -0.245 8.40e- 5